This notebook shows the handover plots for all pathways in the MINOS microsimulation model.

Handover plots are not strictly statistical validation, but more a type of ‘sanity check’ for transitioned variables. A handover plot shows the ‘handover’ from input data to predicted outcomes, and allows us to see at a glance if the predicted outputs are following the same trends seen in the input data.

1 SETUP

Source utils for some functions.

source(here::here('minos', 'utils_datain.R'))
source(here::here('minos', 'utils_validation_vis.R'))
source(here::here('minos', 'validation', 'utils.r'))

1.1 Data

# Read raw datafiles in
raw.files <- list.files(here::here('data', 'final_US'), pattern='[0-9]{4}_US_cohort.csv', full.names = TRUE)
raw.dat <- do.call(rbind, lapply(raw.files, read.csv)) 

out.path <- here::here('output', 'default_config/')
base.dat <- read_singular_local_out(out.path, 'baseline', drop.dead = TRUE)

1.2 Constants

start.year <- 2020
save.path <- here::here("plots", "handovers", 'default')
create.if.not.exists(save.path)

shall.we.save <- FALSE

2 PATHWAYS

2.1 Income

# first figure out how to plot final_US
# start with hh_income

handover_boxplots(raw.dat, base.dat, 'hh_income')

handover_lineplots(raw.dat, base.dat, "hh_income")

# raw.income2 <- raw.dat %>% 
#               dplyr::select(pidp, time, hh_income) %>%
#               filter(pidp %in% raw.income.wave1)
# 
# base.income <-  base.income2 <- base.dat %>%
#                                 dplyr::select(pidp, time, hh_income) %>%
#                                 filter(pidp %in% raw.income.wave1)
# 
# handover_lineplots(raw.d)
# ## Try a version where final_US is limited to only those present from wave 1 
# # onwards because the sample refreshments are messing with the plot
# raw.income.wave1 <- raw.dat$pidp[raw.dat$time == 2009]
# 
# raw.income2 <- raw.dat %>% 
#   dplyr::select(pidp, time, hh_income) %>%
#   filter(pidp %in% raw.income.wave1) %>%
#   group_by(time) %>%
#   summarise(income = mean(hh_income, na.rm = TRUE)) %>%
#   mutate(source = 'final_US')
# 
# base.income2 <- base.dat %>%
#   dplyr::select(pidp, time, hh_income) %>%
#   filter(pidp %in% raw.income.wave1) %>%
#   group_by(time) %>%
#   summarise(income = mean(hh_income, na.rm = TRUE)) %>%
#   mutate(source = 'baseline_output')
# 
# income2 <- rbind(raw.income2, base.income2)
# 
# # Now plot
# ggplot(data = income2, mapping = aes(x = time, y = income, group = source, colour = source)) +
#   geom_line() +
#   geom_vline(xintercept=start.year, linetype='dotted') +
#   labs(title = 'Household Income') +
#   xlab('Year') +
#   ylab('Income')
# 
# if (shall.we.save) {
#   ggsave(filename = 'hh_income_wav1.png',
#        plot = last_plot(),
#        path = save.path)
# }
# 
# raw.income.check <- raw.dat %>% 
#   dplyr::select(pidp, time, hh_income) %>%
#   filter(pidp %in% raw.income.wave1)
# 
# base.income.check <- base.dat %>%
#   dplyr::select(pidp, time, hh_income) %>%
#   filter(pidp %in% raw.income.wave1)
# 
# rm(raw.income, base.income, income, raw.income.wave1, raw.income2, base.income2, 
#    raw.income.check, base.income.check)

2.1.1 Spaghetti

raw.inc <- raw.dat %>%
  select(pidp, age, time, hh_income)
base.inc <- base.dat %>%
  select(pidp, age, time, hh_income)

income.spag <- rbind(raw.inc, base.inc)

density_ridges(income.spag, 'hh_income',
               save=TRUE, 
               save.path=save.path)

pidp_sample <- sample(unique(income.spag$pidp), size=nrow(raw.inc)/10, replace=FALSE)
income.spag.sample <- income.spag[which(income.spag$pidp %in% pidp_sample), ]
spaghetti_plot(income.spag.sample, 'hh_income',
               save = shall.we.save,
               save.path = save.path)

spaghetti_highlight_max_plot(income.spag.sample, 'hh_income',
               save = shall.we.save,
               save.path = save.path)

rm(raw.inc, base.inc, income.spag)

2.2 SF12

# sf12 boxplot handovers
handover_boxplots(raw.dat, base.dat, 'SF_12')

handover_lineplots(raw.dat, base.dat, "SF_12")

# 
# raw.sf12 <- raw.dat %>% 
#   dplyr::select(pidp, time, SF_12) %>%
#   group_by(time) %>%
#   summarise(sf12 = mean(SF_12, na.rm = TRUE)) %>%
#   mutate(source = 'final_US')
# 
# base.sf12 <- base.dat %>%
#   dplyr::select(pidp, time, SF_12) %>%
#   group_by(time) %>%
#   summarise(sf12 = mean(SF_12, na.rm = TRUE)) %>%
#   mutate(source = 'baseline_output')
# 
# # merge before plot
# sf12 <- rbind(raw.sf12, base.sf12)
# 
# # Now plot
# ggplot(data = sf12, mapping = aes(x = time, y = sf12, group = source, colour = source)) +
#   geom_smooth() +
#   #geom_line() +
#   geom_vline(xintercept=start.year, linetype='dotted') +
#   labs(title = 'Mental Wellbeing (SF12)', subtitle = 'Full Sample') +
#   xlab('Year') +
#   ylab('SF12')
# 
# ## Try a version where final_US is limited to only those present from wave 1 
# # onwards because the sample refreshments are messing with the plot
# raw.sf12.wave1 <- raw.dat$pidp[raw.dat$time == 2009]
# 
# raw.sf12.2 <- raw.dat %>% 
#   dplyr::select(pidp, time, SF_12) %>%
#   filter(pidp %in% raw.sf12.wave1) %>%
#   group_by(time) %>%
#   summarise(sf12 = mean(SF_12, na.rm = TRUE)) %>%
#   mutate(source = 'final_US')
# 
# base.sf12.2 <- base.dat %>%
#   dplyr::select(pidp, time, SF_12) %>%
#   filter(pidp %in% raw.sf12.wave1) %>%
#   group_by(time) %>%
#   summarise(sf12 = mean(SF_12, na.rm = TRUE)) %>%
#   mutate(source = 'baseline_output')
# 
# sf12.2 <- rbind(raw.sf12.2, base.sf12.2)
# 
# # Now plot
# ggplot(data = sf12.2, mapping = aes(x = time, y = sf12, group = source, colour = source)) +
#   geom_line() +
#   geom_vline(xintercept=start.year, linetype='dotted') +
#   labs(title = 'Mental Wellbeing (SF12 MCS)') +
#   xlab('Year') +
#   ylab('SF12')
# 
# if (shall.we.save) {
#   ggsave(filename = 'SF12_wav1.png',
#        plot = last_plot(),
#        path = save.path)
# }
# 
# rm(raw.sf12, base.sf12, sf12, raw.sf12.wave1, raw.sf12.2, base.sf12.2, sf12.2)

2.2.1 Spaghetti

raw.sf12 <- raw.dat %>%
  select(pidp, age, time, SF_12)
base.sf12 <- base.dat %>%
  select(pidp, age, time, SF_12)

sf12.spag <- rbind(raw.sf12, base.sf12)

sf12.spag.sample <- sf12.spag[which(sf12.spag$pidp %in% pidp_sample), ]
spaghetti_plot(sf12.spag.sample, 'SF_12',
               save = shall.we.save,
               save.path = save.path)

spaghetti_highlight_max_plot(sf12.spag.sample, 'SF_12',
               save = shall.we.save,
               save.path = save.path)

density_ridges(sf12.spag, "SF_12", 
               save = T, 
               save.path = save.path)

rm(raw.sf12, base.sf12, sf12.spag)

2.3 Nutrition Quality

TODO: Drop negative values ONLY from waves with no data i.e. not 7,9,11

#  nutriton quality line/boxplots.
handover_boxplots(raw.dat, base.dat, 'nutrition_quality')

handover_lineplots(raw.dat, base.dat, 'nutrition_quality')

# Try a version where final_US is limited to only those present from wave 1
# onwards because the sample refreshments are messing with the plot
# raw.wave1 <- raw.dat$pidp[raw.dat$time == 2009]
# 
# raw.nut2 <- raw.dat %>%
#   dplyr::select(pidp, time, nutrition_quality) %>%
#   filter(pidp %in% raw.wave1) %>%
#   filter(nutrition_quality >= 0) %>%
#   group_by(time) %>%
#   summarise(nutrition_quality = mean(nutrition_quality, na.rm = TRUE)) %>%
#   mutate(source = 'final_US')
# 
# base.nut2 <- base.dat %>%
#   dplyr::select(pidp, time, nutrition_quality) %>%
#   filter(pidp %in% raw.wave1) %>%
#   group_by(time) %>%
#   summarise(nutrition_quality = mean(nutrition_quality, na.rm = TRUE)) %>%
#   mutate(source = 'baseline_output')
# 
# nutrition_quality2 <- rbind(raw.nut2, base.nut2)
# 
# # Now plot
# ggplot(data = nutrition_quality2, mapping = aes(x = time, y = nutrition_quality, group = source, colour = source)) +
#   geom_line() +
#   geom_vline(xintercept=start.year, linetype='dotted') +
#   labs(title = 'Nutrition Quality') +
#   xlab('Year') +
#   ylab('Nutrition Quality')
# 
# if (shall.we.save) {
#   ggsave(filename = 'nutrition_quality_wav1.png',
#        plot = last_plot(),
#        path = save.path)
# }
# 
# rm(raw.nut, base.nut, nutrition_quality, raw.wave1, raw.nut2, base.nut2,
#     nutrition_quality2)

2.3.1 Spaghetti

raw.nut <- raw.dat %>%
  select(pidp, age, time, nutrition_quality)
base.nut <- base.dat %>%
  select(pidp, age, time, nutrition_quality)

nut.spag <- rbind(raw.nut, base.nut)

spaghetti_plot(nut.spag, 'nutrition_quality',
               save = shall.we.save,
               save.path = save.path)

density_ridges(nut.spag, "nutrition_quality", 
               save = T, 
               save.path = save.path)

rm(raw.nut, base.nut, nut.spag)